Data

  1. RNAseq samples from cortical tissue
## ---- load cleaned RNA-seq count file and design file ready
load("~/Documents/code/short-term-diet/01_EdgeR_glmQLFit/data/01_data_cleaning.RData")
source("~/Documents/code/01_function/my_edgeR.R") # tidyverse library is loaded in my_edgeR.R 

out_prefix<-"Cortex.RMoutlier"
out_analysis <- "./analysis/"
out_figure <- "./figure/"

data.raw=data.raw.C
data.design=data.design.C
group=group.C

library(tidyverse)
#remove outlier sample CD.2weeks.Sed.C_17989C and get rid of all running groups (pre-determined in Cortex.Rmd)
data.raw <- data.raw %>% select(-contains("17989"),-contains("Run"))
data.design <- data.design %>% filter(!Sample_Name %in% c("17989C"), TERM_3 == "Sed")
group <- factor(data.design$Group)

Quality Control 1:

  1. Build edgeR object
  2. filtering
  3. normalization
  4. estimate dispersion
# build edgeR object
library(edgeR)
y <- DGEList(data.raw, group=group, genes=row.names(data.raw)) # must specify
options(digits=3)
y$samples
# before filtering and normalization
log.cpm <- cpm(y, log=TRUE, prior.count=2) 
boxplot(log.cpm)

hist(apply(log.cpm, 2, median), xlab="median of log2(cpm)", main="")

# attach gene symbols to edgeR object 
y<- symbols(y)
# The following package has already been detached within symbol function I wrote 
#detach("package:org.Mm.eg.db", unload=TRUE) # conflict with select function, so to detach
#detach("package:AnnotationDbi", unload=TRUE)

head(y$genes)
## dropNAsymbols
y <- y[!is.na(y$genes$Symbol),] 

# keep gene more than 1cpm in at least 2 samples 
dim(y)
[1] 23103    46
keep <- rowSums(cpm(y) > 1) >= 2 ## filter
table(keep)
keep
FALSE  TRUE 
 8774 14329 
y <- y[keep, , keep.lib.sizes=FALSE]

# after filtering and before normalization
log.cpm <- cpm(y, log=TRUE, prior.count=2) 
boxplot(log.cpm)

hist(apply(log.cpm, 2, median), xlab="median of log2(cpm)", main="")

# estimate dispersion
# design
design <- model.matrix(~0+group)
colnames(design) <- levels(group)
design
   CD.2weeks.Sed.C CD.4weeks.Sed.C CD.6weeks.Sed.C CD.8weeks.Sed.C
1                0               0               0               0
2                0               0               0               0
3                0               0               0               0
4                0               0               0               0
5                0               0               0               0
6                0               0               0               0
7                0               0               0               0
8                0               0               0               0
9                0               0               0               0
10               0               0               0               0
11               0               0               0               0
12               0               0               0               0
13               0               0               0               0
14               0               0               0               0
15               0               0               0               0
16               0               0               0               0
17               0               0               0               0
18               0               0               0               0
19               1               0               0               0
20               1               0               0               0
21               1               0               0               0
22               1               0               0               0
23               0               1               0               0
24               0               1               0               0
25               0               1               0               0
26               0               1               0               0
27               0               1               0               0
28               0               0               1               0
29               0               0               1               0
30               0               0               1               0
31               0               0               1               0
32               0               0               0               1
33               0               0               0               1
34               0               0               0               0
35               0               0               0               0
36               0               0               0               1
37               0               0               0               1
38               0               0               0               0
39               0               0               0               0
40               0               0               0               0
41               0               0               0               0
42               1               0               0               0
43               0               1               0               0
44               0               0               1               0
45               0               0               1               0
46               0               0               0               1
   WD.2weeks.Sed.C WD.4weeks.Sed.C WD.6weeks.Sed.C WD.8weeks.Sed.C
1                1               0               0               0
2                1               0               0               0
3                1               0               0               0
4                1               0               0               0
5                0               1               0               0
6                0               1               0               0
7                0               1               0               0
8                0               1               0               0
9                0               1               0               0
10               0               0               1               0
11               0               0               1               0
12               0               0               1               0
13               0               0               1               0
14               0               0               1               0
15               0               0               0               1
16               0               0               0               1
17               0               0               0               1
18               0               0               0               1
19               0               0               0               0
20               0               0               0               0
21               0               0               0               0
22               0               0               0               0
23               0               0               0               0
24               0               0               0               0
25               0               0               0               0
26               0               0               0               0
27               0               0               0               0
28               0               0               0               0
29               0               0               0               0
30               0               0               0               0
31               0               0               0               0
32               0               0               0               0
33               0               0               0               0
34               0               0               0               1
35               0               0               0               1
36               0               0               0               0
37               0               0               0               0
38               1               0               0               0
39               1               0               0               0
40               0               1               0               0
41               0               0               1               0
42               0               0               0               0
43               0               0               0               0
44               0               0               0               0
45               0               0               0               0
46               0               0               0               0
attr(,"assign")
[1] 1 1 1 1 1 1 1 1
attr(,"contrasts")
attr(,"contrasts")$group
[1] "contr.treatment"
# estimateDisp--------------------------------------------------------
y <- estimateDisp(y, design, robust=TRUE)
# plotBCV, width="3.8in", fig.cap="Scatterplot of the biological coefficient of variation (BCV) against the average abundance of each gene. The plot shows the square-root estimates of the common, trended and tagwise NB dispersions."----
plotBCV(y)

# glmQLFit------------------------------------------------------------
fit <- glmQLFit(y, design, robust=TRUE)
head(fit$coefficients)
                   CD.2weeks.Sed.C CD.4weeks.Sed.C CD.6weeks.Sed.C
ENSMUSG00000000001          -10.16          -10.24          -10.09
ENSMUSG00000000028          -13.57          -13.42          -13.71
ENSMUSG00000000037          -13.28          -13.75          -13.06
ENSMUSG00000000056           -9.98           -9.92           -9.99
ENSMUSG00000000058          -10.09          -10.22           -9.87
ENSMUSG00000000078           -9.78           -9.83           -9.68
                   CD.8weeks.Sed.C WD.2weeks.Sed.C WD.4weeks.Sed.C
ENSMUSG00000000001          -10.06          -10.17          -10.17
ENSMUSG00000000028          -13.40          -13.55          -13.48
ENSMUSG00000000037          -13.22          -13.28          -12.88
ENSMUSG00000000056           -9.85           -9.97          -10.05
ENSMUSG00000000058           -9.95          -10.01          -10.18
ENSMUSG00000000078           -9.73           -9.73           -9.82
                   WD.6weeks.Sed.C WD.8weeks.Sed.C
ENSMUSG00000000001          -10.05          -10.12
ENSMUSG00000000028          -13.58          -13.45
ENSMUSG00000000037          -13.35          -12.98
ENSMUSG00000000056           -9.95           -9.87
ENSMUSG00000000058           -9.85           -9.93
ENSMUSG00000000078           -9.61           -9.67
# QLDisp, out.width="3.8in", fig.cap="A plot of the quarter-root QL dispersion against the average abundance of each gene. Estimates are shown for the raw (before EB moderation), trended and squeezed (after EB moderation) dispersions. Note that the QL dispersions and trend shown here are relative to the NB dispersion trend shown in Figure~\ref{fig:plotBCV}."----
plotQLDisp(fit)

# df.prior------------------------------------------------------------
summary(fit$df.prior)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   3.57    3.57    3.64    3.64    3.70    3.70 

Quality Control 2:

  1. pearson correlation among samples
  2. MDS plot
  3. Principle Component Analysis
# pearson correlation
cpm <- cpm(y, normalized.lib.size=T) 
cpm.cor <- cor(cpm, method = "pearson")
hist(cpm.cor)

library(corrplot)
corrplot(cpm.cor, method="square", order="hclust", cl.lim = c(0.85, 1),  tl.col="purple", tl.cex = 0.75, type = "full", is.corr = FALSE)
colorlegend(colbar = grey(1:100 / 100), 1:10, col = "red", align = "l",
            xlim = c(0, 6), ylim = c(-0.5,-0.1), vertical = FALSE)

# MDS plot
  # no label version
par(xpd = T, mar = par()$mar + c(0,0,0,7))  # to make legends outside of the plot
colors <- rep(c("darkgreen", "red", "blue", "purple"), 2)
pch <- c(0,1,2,5,15,16,17,18)
plotMDS(y, top = 500, cex = 1, pch=pch[group], dim.plot = c(1,2), ndim = 2, gene.selection = "pairwise", col=colors[group]) # col = as.numeric(group) differentiate colors between groups
legend(1.2, 0.7,levels(group), pch=pch, col=colors)

par(mar=c(5, 4, 4, 2.3) + 0.1)

  # with label version, to identify outliers
par(xpd = T, mar = par()$mar + c(0,0,0,7))  # to make legends outside of the plot
colors <- rep(c("darkgreen", "red", "blue", "purple"), 2)
  # pch <- c(0,1,2,5,15,16,17,18)
plotMDS(y, top = 500, cex = 1, dim.plot = c(1,2), ndim = 2, gene.selection = "pairwise", col=colors[group]) # col = as.numeric(group) differentiate colors between groups
legend(1.2, 0.7,levels(group), pch=pch, col=colors)

par(mar=c(5, 4, 4, 2.3) + 0.1)

## ---- Pincicpal component analysis ---
logCPM.PCA<-log.cpm 
rownames(logCPM.PCA) <- y$genes$Symbol
#colnames(logCPM) <- paste(y$samples$group, 1:2, sep="-")
colnames(logCPM.PCA) <- data.design$sample_short # get it into the y project

pca_original = prcomp(t(logCPM.PCA),scale=T, center=T)
pca_x <- pca_original$x
pca_table <- data.frame(pca_x, data.design)
x <- pca_original$sdev^2/sum(pca_original$sdev^2) # Proportion of Variance Explained for all components

## Scree plot
plot(x, xlab="Principal Component", ylab="Proportion of Variance Explained", type="b")

plot(cumsum(x), xlab="Principal Component", ylab="Cumulative Proportion of Variance Explained", type="b")

## PCA plot 
library(ggplot2)
PCA_plot <- function(pca_table, PC_x, PC_y, color, shape){
  #PC_x,PC_y are type of interger
  #color, shape, are type of string
  g <- ggplot(pca_table, aes_string(x=names(pca_table[PC_x]), y=names(pca_table[PC_y]), color=color, shape=shape)) 
  g <- g + geom_point(alpha=0.7, size=3) 
  # g <- g + labs(color = "Group", shape="Tissue")
  g + labs(x = paste(names(pca_table[PC_x]), scales::percent(x[PC_x]),"variance explained", sep=" "), y=paste(names(pca_table[PC_y]), scales::percent(x[PC_y]),"variance explained", sep=" "))
  #filename <- paste()
  #ggsave(filename, width=7, height=7, units="in")
}

PCA_plot(pca_table, 1, 2, "TERM_1", "TERM_2")

PCA_plot(pca_table, 2, 3, "TERM_1", "TERM_2")

Data Manipulation for GLM

# transpose cpm, observation (row) is sample, variable (column) is each gene. 
cpm <- cpm(y, normalized.lib.size=T) 
cpm.t<-t(cpm)
colnames(cpm)
 [1] "WD.2weeks.Sed.C_17963C" "WD.2weeks.Sed.C_17964C"
 [3] "WD.2weeks.Sed.C_17966C" "WD.2weeks.Sed.C_17967C"
 [5] "WD.4weeks.Sed.C_17969C" "WD.4weeks.Sed.C_17970C"
 [7] "WD.4weeks.Sed.C_17971C" "WD.4weeks.Sed.C_17972C"
 [9] "WD.4weeks.Sed.C_17973C" "WD.6weeks.Sed.C_17974C"
[11] "WD.6weeks.Sed.C_17976C" "WD.6weeks.Sed.C_17977C"
[13] "WD.6weeks.Sed.C_17978C" "WD.6weeks.Sed.C_17979C"
[15] "WD.8weeks.Sed.C_17980C" "WD.8weeks.Sed.C_17981C"
[17] "WD.8weeks.Sed.C_17982C" "WD.8weeks.Sed.C_17985C"
[19] "CD.2weeks.Sed.C_17986C" "CD.2weeks.Sed.C_17987C"
[21] "CD.2weeks.Sed.C_17990C" "CD.2weeks.Sed.C_17991C"
[23] "CD.4weeks.Sed.C_17992C" "CD.4weeks.Sed.C_17993C"
[25] "CD.4weeks.Sed.C_17995C" "CD.4weeks.Sed.C_17996C"
[27] "CD.4weeks.Sed.C_17997C" "CD.6weeks.Sed.C_17999C"
[29] "CD.6weeks.Sed.C_18000C" "CD.6weeks.Sed.C_18602C"
[31] "CD.6weeks.Sed.C_18603C" "CD.8weeks.Sed.C_18605C"
[33] "CD.8weeks.Sed.C_18606C" "WD.8weeks.Sed.C_18624C"
[35] "WD.8weeks.Sed.C_18625C" "CD.8weeks.Sed.C_18626C"
[37] "CD.8weeks.Sed.C_18627C" "WD.2weeks.Sed.C_18673C"
[39] "WD.2weeks.Sed.C_18674C" "WD.4weeks.Sed.C_18675C"
[41] "WD.6weeks.Sed.C_18676C" "CD.2weeks.Sed.C_18677C"
[43] "CD.4weeks.Sed.C_18678C" "CD.6weeks.Sed.C_18679C"
[45] "CD.6weeks.Sed.C_18680C" "CD.8weeks.Sed.C_18681C"
all(data.design$sample_short == rownames(cpm.t))
[1] TRUE
# add design file to transposed cpm, data manipulation using filter based on design file TERMs (TERM_1 - TERM_4)
cpm.t.meta <- cbind(cpm.t, data.design)
# select all the WD and Sed group
cpm.WD.t <-cpm.t.meta %>% filter(TERM_1 == "WD", TERM_3 == "Sed") 
# remove metadata and transpose table back, rows = genes, columns = samples
gene_number <- dim(cpm)[1]
gene_number
[1] 14329
cpm.WD <- cpm.WD.t[, c(1: gene_number)] %>% t()  
# check whether metadata (which contains characters has been removed, values should all be doulbe type now)
all(map_lgl(cpm.WD, is.double))
[1] TRUE
# assign sample_name_short it to the colnames after transposition.
colnames(cpm.WD) <- as.character(cpm.WD.t$sample_short)

# select all the CD and Sed group, and calculate the mean of each group based on weeks
cpm.ave.CD.meta <- cpm.t.meta %>% filter(TERM_1 == "CD") %>% group_by(TERM_2) %>% summarise_all(mean)
# check the column boundray of cpm
cpm.ave.CD.meta[,1:2]
cpm.ave.CD.meta[,(gene_number+1):(gene_number+2)]
# remove meta column and transform
cpm.ave.CD <- cpm.ave.CD.meta[, 2:(gene_number+1)] %>% t() 
colnames(cpm.ave.CD) <- cpm.ave.CD.meta[,1] %>% unlist() %>% paste("CD.ave.", ., sep="")
all(map_lgl(cpm.ave.CD, is.double))
[1] TRUE
# The following is to subtract the mean of CD from WD of each week, and glm model the difference between WD and CD group
dim(cpm.WD)
[1] 14329    24
dim(cpm.ave.CD)
[1] 14329     4
cpm.clean <- data.frame(cpm.WD, cpm.ave.CD) ## column 1:14395 are WD count read, 14396:14399 are CD count read.
dim(cpm.clean)
[1] 14329    28
head(cpm.clean)
# WD-CD.ave, result will be combined into D_all value
D_2wk <- cpm.clean %>% select(contains("2weeks")) %>% mutate_all(funs(Dif=.- CD.ave.2weeks)) %>% select(contains("C_Dif"))
D_4wk <- cpm.clean %>% select(contains("4weeks")) %>% mutate_all(funs(Dif=.- CD.ave.4weeks)) %>% select(contains("C_Dif"))
D_6wk <- cpm.clean %>% select(contains("6weeks")) %>% mutate_all(funs(Dif=.- CD.ave.6weeks)) %>% select(contains("C_Dif"))
D_8wk <- cpm.clean %>% select(contains("8weeks")) %>% mutate_all(funs(Dif=.- CD.ave.8weeks)) %>% select(contains("C_Dif"))

D_all <- data.frame(D_2wk, D_4wk, D_6wk, D_8wk) 
rownames(D_all) <- rownames(cpm.ave.CD)
head(D_all)

Generalized linear mGLM

z ~ u + duration, z= y(WD)-y(CD)ave

  1. Model the difference of WD and CD by duration on diet
  2. The difference of WD and CD is calculated by subtracting the average of CD from each WD
  • duration: 2wks, 4wks, 6wks, 8wks
## ---- GLM, z ~ u + duration, z= y(WD)-y(CD)
# use durataion as predictor, duration matches the column of D_all
duration <- c(rep("2weeks", 6), rep("4weeks", 6), rep("6weeks", 6), rep("8weeks", 6))

# for each gene, generate GLM with duration as prediction, collect the summary of the model for each gene
glm.fit <- D_all %>% t() %>% as.data.frame() %>%
  map(~ lm(. ~  duration)) %>% 
  map(summary)

colnames(D_all)
 [1] "WD.2weeks.Sed.C_17963C_Dif" "WD.2weeks.Sed.C_17964C_Dif"
 [3] "WD.2weeks.Sed.C_17966C_Dif" "WD.2weeks.Sed.C_17967C_Dif"
 [5] "WD.2weeks.Sed.C_18673C_Dif" "WD.2weeks.Sed.C_18674C_Dif"
 [7] "WD.4weeks.Sed.C_17969C_Dif" "WD.4weeks.Sed.C_17970C_Dif"
 [9] "WD.4weeks.Sed.C_17971C_Dif" "WD.4weeks.Sed.C_17972C_Dif"
[11] "WD.4weeks.Sed.C_17973C_Dif" "WD.4weeks.Sed.C_18675C_Dif"
[13] "WD.6weeks.Sed.C_17974C_Dif" "WD.6weeks.Sed.C_17976C_Dif"
[15] "WD.6weeks.Sed.C_17977C_Dif" "WD.6weeks.Sed.C_17978C_Dif"
[17] "WD.6weeks.Sed.C_17979C_Dif" "WD.6weeks.Sed.C_18676C_Dif"
[19] "WD.8weeks.Sed.C_17980C_Dif" "WD.8weeks.Sed.C_17981C_Dif"
[21] "WD.8weeks.Sed.C_17982C_Dif" "WD.8weeks.Sed.C_17985C_Dif"
[23] "WD.8weeks.Sed.C_18624C_Dif" "WD.8weeks.Sed.C_18625C_Dif"
glm.fit %>% names() %>% head()
[1] "ENSMUSG00000000001" "ENSMUSG00000000028" "ENSMUSG00000000037"
[4] "ENSMUSG00000000056" "ENSMUSG00000000058" "ENSMUSG00000000078"
# look at two examples "Apoe", "Pecam1" in glm model
y$genes[y$genes$Symbol=="Apoe",]
glm.fit[["ENSMUSG00000002985"]]

Call:
lm(formula = . ~ duration)

Residuals:
    Min      1Q  Median      3Q     Max 
-139.56  -84.47    7.05   49.36  284.98 

Coefficients:
               Estimate Std. Error t value Pr(>|t|)
(Intercept)       -18.7       45.4   -0.41     0.68
duration4weeks     34.1       64.1    0.53     0.60
duration6weeks     62.7       64.1    0.98     0.34
duration8weeks     66.7       64.1    1.04     0.31

Residual standard error: 111 on 20 degrees of freedom
Multiple R-squared:  0.0649,    Adjusted R-squared:  -0.0753 
F-statistic: 0.463 on 3 and 20 DF,  p-value: 0.711
y$genes[y$genes$Symbol=="Pecam1",]
glm.fit[["ENSMUSG00000020717"]]

Call:
lm(formula = . ~ duration)

Residuals:
   Min     1Q Median     3Q    Max 
-5.461 -1.967 -0.402  2.049  6.151 

Coefficients:
               Estimate Std. Error t value Pr(>|t|)
(Intercept)      1.2814     1.3058    0.98     0.34
duration4weeks   0.0896     1.8466    0.05     0.96
duration6weeks  -0.3399     1.8466   -0.18     0.86
duration8weeks   0.0125     1.8466    0.01     0.99

Residual standard error: 3.2 on 20 degrees of freedom
Multiple R-squared:  0.0032,    Adjusted R-squared:  -0.146 
F-statistic: 0.0214 on 3 and 20 DF,  p-value: 0.996

Obtain the statistics from GLM

# Use home-made pipeline stat_table_all to: 
source("~/Documents/code/01_function/my_GLM.R") 
# 1. retrieve the R-square(r2) and the corrected p value (q) from the fit summary of each gene
# 2. filter genes based on qval<0.05, r2>0.5, arrange based on q (ascending).
# 3. from filtered genes, retrieve coefficient of estimate and p val for each gene for down stream analysis.
stat_table_all <- stat_table(glm.fit)
dim(stat_table_all)
[1] 906  12
head(stat_table_all)
# add the gene names to stat_table_all 
stat_table_all<-left_join(stat_table_all, y$genes, by=c("ENSMUSG_ID"="genes"))
dim(stat_table_all)
[1] 906  15
head(stat_table_all)
# export the gene list table for Kegg and GO search
# write.table(stat_table_all, paste(out_analysis, out_prefix, "_gene_list.txt", sep=""), sep="\t", row=F, quote=F)

# export the background gene list for Kegg and GO search, subtract genes of stat_table_all from y$genes
bg_gene_list <- y$genes 
dim(bg_gene_list)
[1] 14329     4
#write.table(bg_gene_list, paste(out_analysis, out_prefix, "_bg_gene.txt", sep=""), sep="\t", row=F, quote=F)

Explore the GLM model

  1. PCA of the coefficient matrix (row: genes; colums: coefficients)
  2. heatmap of the coefficient matrix
  3. line plot of all the coefficients based on each gene
  4. VennDiagram plotting of genes and overlap of genes that are significant changed under each coefficient catagory
  5. Tile plot showing the pattern of coeffiecint
# principal component analysis of estimate of the coefficient
stat_est <- stat_table_all %>% select(contains("est"))
head(stat_est)
pca_original = prcomp(t(stat_est),scale=T, center=T)
pca_x <- data.frame(pca_original$x, estimate = rownames(pca_original$x))
pca_table <- pca_x
x <- pca_original$sdev^2/sum(pca_original$sdev^2) # Proportion of Variance Explained for all components

# Scree plot
plot(x, xlab="Principal Component", ylab="Proportion of Variance Explained", type="b")

plot(cumsum(x), xlab="Principal Component", ylab="Cumulative Proportion of Variance Explained", type="b")

# PCA plot
g <- ggplot(pca_table, aes(x=pca_table[1], y=pca_table[2], color= estimate))
  g <- g + geom_point(alpha=0.7, size=3) 
  # g <- g + labs(color = "Group", shape="Tissue")
  g + labs(x = paste(names(pca_table[1]), scales::percent(x[1]),"variance explained", sep=" "), y=paste(names(pca_table[2]), scales::percent(x[2]),"variance explained", sep=" "))

# hirachical analysis of estimate of the coefficient
stat_est_h<- t(scale(t(stat_est))) %>% as.data.frame() %>% as.matrix()
  
library(gplots)
col.pan <- colorpanel(100, "blue", "white", "red")
heatmap.2(stat_est_h, col=col.pan, Rowv=TRUE, scale="none", 
          trace="none", dendrogram="both", cexRow=0.5, cexCol=0.7, density.info="none",
          margin=c(9,4), lhei=c(2,10), lwid=c(2,6))  

# plot the estimate of coefficient of each gene
stat_table_est.gather <- stat_table_all %>% 
  select("ENSMUSG_ID", contains("_est")) %>% 
  gather(key=coeff_group, value=estimate, -ENSMUSG_ID)
str(stat_table_est.gather)
'data.frame':   3624 obs. of  3 variables:
 $ ENSMUSG_ID : chr  "ENSMUSG00000020178" "ENSMUSG00000038805" "ENSMUSG00000068696" "ENSMUSG00000071234" ...
 $ coeff_group: chr  "X.Intercept._est" "X.Intercept._est" "X.Intercept._est" "X.Intercept._est" ...
 $ estimate   : num  0.373 0.385 7.494 0.56 0.225 ...
stat_table_est.gather$coeff_group <- factor(stat_table_est.gather$coeff_group,
                                            levels =c("X.Intercept._est", "duration4weeks_est", "duration6weeks_est","duration8weeks_est"),
                                            ordered = TRUE)
ggplot(stat_table_est.gather, aes(x=coeff_group,y=estimate, group=ENSMUSG_ID)) + 
  geom_line(alpha=0.1, color="red") 

ggplot(stat_table_est.gather, aes(x=coeff_group,y=estimate, group=ENSMUSG_ID))+ 
  geom_line(alpha=0.1, color="red") + scale_y_continuous(limits= c(-50, 75))

# To identify each gene’s critical predictor. binarized the estimate and significance matrices by hard cutoffs.
stat_pval <- stat_table_all %>% select(contains("_pval"))
head(stat_pval)
head(stat_est)
stat_binary <- (apply(stat_pval, 2, function(x) x<0.05) & apply(stat_est, 2, function(x) abs(x)>0.2)) %>% as.data.frame()
rowSums(stat_binary) %>% table()
.
  0   1   2   3   4 
  1 435 334  96  40 
# summary of the numbers of genes passes the filter
apply(stat_binary, 2, table)
      X.Intercept._pval duration4weeks_pval duration6weeks_pval
FALSE               731                 373                 295
TRUE                175                 533                 611
      duration8weeks_pval
FALSE                 674
TRUE                  232
# sepertate out genes by their coefficients
col <- as.list(names(stat_binary))
genes_by_coeff <- map(col, ~ stat_table_all[["Symbol"]][stat_binary[[.]]])
names(genes_by_coeff) <- c("Intercept", "duration4wks", "duration6wks", "duration8wks")
glimpse(genes_by_coeff)
List of 4
 $ Intercept   : chr [1:175] "Six3" "Gpr88" "Drd2" "Gpr6" ...
 $ duration4wks: chr [1:533] "Gpr88" "Syndig1l" "Tacr1" "Ptpn9" ...
 $ duration6wks: chr [1:611] "Adora2a" "Six3" "Gpr88" "Syndig1l" ...
 $ duration8wks: chr [1:232] "Gpr88" "Ido1" "Adck2" "Fos" ...
# to check the overlap of genes in different coefficient catagories
library(VennDiagram)
grid.newpage()
within(genes_by_coeff,
       draw.quad.venn(area1 = length(Intercept), 
                      area2 = length(duration4wks), 
                      area3 = length(duration6wks), 
                      area4 = length(duration8wks),
                      n12 = length(intersect(Intercept, duration4wks)), 
                      n13 = length(intersect(Intercept, duration6wks)),
                      n14 = length(intersect(Intercept, duration8wks)),
                      n23 = length(intersect(duration4wks, duration6wks)),
                      n24 = length(intersect(duration4wks, duration8wks)),
                      n34 = length(intersect(duration6wks, duration8wks)),
                      n123 = Intercept %>% intersect(duration4wks) %>% intersect(duration6wks) %>% length(),
                      n124 = Intercept %>% intersect(duration4wks) %>% intersect(duration8wks) %>% length(),
                      n134 = Intercept %>% intersect(duration6wks) %>% intersect(duration8wks) %>% length(),
                      n234 = duration4wks %>% intersect(duration6wks) %>% intersect(duration8wks) %>% length(),
                      n1234 = Intercept %>% intersect(duration4wks) %>% intersect(duration6wks) %>% intersect(duration8wks) %>% length(),
                      category = names(genes_by_coeff),
                      fill = c("orange", "red", "green", "blue"))
        )

$Intercept
  [1] "Six3"          "Gpr88"         "Drd2"          "Gpr6"         
  [5] "Slc35d3"       "Klhl13"        "Ldlr"          "Nexn"         
  [9] "4930432K21Rik" "Rasgrp2"       "Tmem200a"      "Hrh1"         
 [13] "Shq1"          "Scn5a"         "Ofd1"          "Ybey"         
 [17] "3632451O06Rik" "Fam177a"       "Lpcat2"        "Gpatch11"     
 [21] "Fam111a"       "Mtrf1"         "Nectin4"       "Phf7"         
 [25] "Lrrc49"        "Diexf"         "Micall2"       "Arhgap22"     
 [29] "Insig1"        "Mme"           "Arl5c"         "C77080"       
 [33] "Bcl6b"         "Ppfibp2"       "Zfp804a"       "Nr1d1"        
 [37] "Ptcd1"         "Anapc4"        "Ddx27"         "Slc22a3"      
 [41] "Dtwd2"         "Pik3c3"        "Vipr1"         "Gm5083"       
 [45] "Rars2"         "Tcerg1l"       "E130102H24Rik" "Zfp930"       
 [49] "Ankra2"        "Nanos2"        "Gtf2h1"        "B3gnt9"       
 [53] "Ttc34"         "Tada1"         "Ccdc183"       "Pex13"        
 [57] "Samd3"         "Fam53b"        "Tigd2"         "Herpud2"      
 [61] "Rasd1"         "Trim17"        "Rbm43"         "Pcsk9"        
 [65] "Mta2"          "Dbr1"          "Dhx32"         "Rac2"         
 [69] "4930513N10Rik" "Gpt"           "Cog6"          "Pum3"         
 [73] "Runx2"         "Nrap"          "Nub1"          "Gart"         
 [77] "Wrb"           "Sdhaf3"        "Zfp791"        "Zwilch"       
 [81] "Mustn1"        "Ubxn4"         "Bbs9"          "Smad7"        
 [85] "Zkscan6"       "Oxnad1"        "Glyctk"        "Rassf6"       
 [89] "Arhgap27"      "Nat8f5"        "Dusp7"         "St7l"         
 [93] "Pced1b"        "Cdkal1"        "2700046G09Rik" "Ufd1"         
 [97] "Il21r"         "Card19"        "Eif4e2"        "Gemin8"       
[101] "Gm11696"       "6430550D23Rik" "Fra10ac1"      "Pdhx"         
[105] "Cbx8"          "Iqcc"          "Kcna5"         "Znrf1"        
[109] "P4ha3"         "Hspa9"         "Rabgef1"       "Slc52a3"      
[113] "Zdhhc15"       "2310057M21Rik" "Cys1"          "Hvcn1"        
[117] "Cyb5d2"        "Iba57"         "Arntl"         "Tpm1"         
[121] "Top1"          "Otx1"          "Trim26"        "Agk"          
[125] "Meltf"         "Sh3yl1"        "Rerg"          "Zbed4"        
[129] "Rasip1"        "Kcne1l"        "Lfng"          "Met"          
[133] "Hpdl"          "Klhdc1"        "Mettl25"       "Tecpr1"       
[137] "Snx2"          "Ccdc181"       "Ccl6"          "Cmtm3"        
[141] "Thtpa"         "Adprm"         "BC030500"      "Zc3h8"        
[145] "Sox18"         "Slc9a9"        "Mob3c"         "Atpaf1"       
[149] "Slc35g2"       "Wdr92"         "Cbwd1"         "Hic2"         
[153] "Isg15"         "6330409D20Rik" "Ucp3"          "Mettl2"       
[157] "Atg14"         "C530005A16Rik" "1810010H24Rik" "Sema5b"       
[161] "Aggf1"         "Cutc"          "LOC100861615"  "Ephb3"        
[165] "Ubc"           "Araf"          "Plrg1"         "Aspa"         
[169] "Zfp7"          "Zfp202"        "Cluap1"        "Inip"         
[173] "Lhx2"          "Tgfa"          "Ttc26"        

$duration4wks
  [1] "Gpr88"         "Syndig1l"      "Tacr1"         "Ptpn9"        
  [5] "Adck2"         "Inhba"         "Rxrg"          "Trib2"        
  [9] "Banp"          "Oprk1"         "Fos"           "Sh2d3c"       
 [13] "Ldlr"          "4930432K21Rik" "Nuak1"         "Spred2"       
 [17] "Gfod1"         "Smad3"         "Cln6"          "Cwc25"        
 [21] "Cdyl2"         "Naglu"         "Lrrc61"        "Tmem200a"     
 [25] "Casp7"         "Dll4"          "Rilpl1"        "Dusp4"        
 [29] "Junb"          "Mccc2"         "Schip1"        "Dnajc21"      
 [33] "Ier2"          "Spata5"        "Hrh1"          "Shq1"         
 [37] "Erf"           "Rrad"          "Scn5a"         "Ofd1"         
 [41] "Cbfa2t3"       "Ybey"          "St8sia5"       "Heyl"         
 [45] "Mical2"        "Ccdc112"       "Mamld1"        "3632451O06Rik"
 [49] "Sgsm1"         "Myadml2"       "Zfp459"        "Fam177a"      
 [53] "Fmnl1"         "Tdp1"          "Pfkfb3"        "Bhlhe40"      
 [57] "Hyi"           "Zfand3"        "Sco1"          "Cc2d1b"       
 [61] "Lrfn2"         "Lig4"          "Lpcat2"        "Mfap3"        
 [65] "Pdp1"          "Cd180"         "Cnot4"         "Gpatch11"     
 [69] "Fam111a"       "Slc10a7"       "Plcl2"         "S1pr2"        
 [73] "Smg9"          "Bdnf"          "B4galt4"       "Mtrf1"        
 [77] "Heatr3"        "Trpm4"         "Mark3"         "Nfatc2"       
 [81] "Ngfr"          "Nectin4"       "Htr1b"         "Phf7"         
 [85] "Lrrc49"        "Crem"          "Ipmk"          "Zfyve1"       
 [89] "Glrx2"         "4930453N24Rik" "Ccnf"          "Trim68"       
 [93] "Micall2"       "Arhgap22"      "Blzf1"         "Insig1"       
 [97] "Dusp16"        "Arl5c"         "Zfand4"        "Tefm"         
[101] "Jun"           "Bcl6b"         "Slc25a35"      "Gtf2ird1"     
[105] "Fam212b"       "Mex3d"         "A930017K11Rik" "Ccdc6"        
[109] "Plekha3"       "Ppfibp2"       "Phf13"         "Grip1"        
[113] "Ilvbl"         "Armc5"         "E2f6"          "Fam222a"      
[117] "Synpo"         "Htr6"          "Ciart"         "Ptcd1"        
[121] "Frmd3"         "Anapc4"        "Zbtb12"        "Lhfp"         
[125] "Actr5"         "Zmynd8"        "Slc22a3"       "Hunk"         
[129] "Lmcd1"         "Gpr83"         "Zscan22"       "Pms1"         
[133] "Dusp8"         "Chaf1a"        "Hrh2"          "Plcd1"        
[137] "Mmp24"         "Mycn"          "Gpr1"          "Pik3c3"       
[141] "Thap1"         "Gm20878"       "Ankrd33b"      "Snap29"       
[145] "Layn"          "Gm5083"        "Ddx50"         "Pla2g4e"      
[149] "Zfp366"        "Prkag2"        "Abcb1b"        "Tcerg1l"      
[153] "Zfp930"        "Ppm1d"         "Nanos2"        "Rassf1"       
[157] "Gtf2h1"        "Tfdp1"         "Pou6f2"        "Cacng4"       
[161] "Cables1"       "Gpr68"         "Uhrf2"         "B3gnt9"       
[165] "A430108G06Rik" "Map3k21"       "LOC100862446"  "Gpr137b"      
[169] "Homer1"        "Tada1"         "Fam117a"       "Rasl11b"      
[173] "Cbln2"         "Ccdc183"       "Hist1h2ao"     "Narf"         
[177] "P4ha1"         "Cnep1r1"       "Pex13"         "Abhd6"        
[181] "Ubox5"         "Irs2"          "4931440F15Rik" "Nck1"         
[185] "Kbtbd2"        "Bend3"         "2410002F23Rik" "Tigd2"        
[189] "Mapk14"        "Zbtb40"        "Camk1g"        "Rhobtb2"      
[193] "Lsm11"         "Adam19"        "Ephb2"         "St7"          
[197] "Rhob"          "1700029J07Rik" "Rnd3"          "Tgfb3"        
[201] "Lrrc29"        "Pop1"          "Herpud2"       "Arhgef7"      
[205] "Rasd1"         "Rtel1"         "Prdm4"         "Mettl3"       
[209] "Kcnf1"         "Wisp1"         "Tiparp"        "Lss"          
[213] "Dvl2"          "Rbm43"         "Wdcp"          "Cd300c2"      
[217] "Jade2"         "Prickle1"      "Dhx32"         "Abcc10"       
[221] "Slc35f3"       "4930513N10Rik" "Dusp14"        "Sgms1"        
[225] "Pafah2"        "Plekho2"       "Serinc2"       "Cog6"         
[229] "Pum3"          "Sema4c"        "Chadl"         "Hmox1"        
[233] "Brix1"         "Apold1"        "Dars"          "Sf3a3"        
[237] "Cpne2"         "Fbn1"          "Usp18"         "Scpep1"       
[241] "Dnttip2"       "Cd302"         "Adamts1"       "Zdhhc5"       
[245] "Foxred1"       "Ubxn10"        "Xbp1"          "Cited2"       
[249] "Ltv1"          "Bcor"          "Fam171a1"      "Nrap"         
[253] "Nub1"          "Rad9b"         "Gart"          "Zfp438"       
[257] "2210408I21Rik" "Wrb"           "Nr4a1"         "Zfp180"       
[261] "Mtmr4"         "Ppp2r5a"       "Nop2"          "Sdhaf3"       
[265] "Sh2d5"         "Galns"         "Zfp791"        "Arhgef3"      
[269] "Hps1"          "Calhm2"        "D630045J12Rik" "Zwilch"       
[273] "Mustn1"        "Zswim4"        "Hspa12b"       "Egr2"         
[277] "Rfwd2"         "Zfp316"        "Slfn2"         "Rnf4"         
[281] "Ubxn4"         "Usp43"         "Hexim2"        "Ppp1r13l"     
[285] "Erp44"         "Man2b2"        "Ccl9"          "Smad7"        
[289] "Vegfc"         "Ric8b"         "Cfap70"        "Tmem220"      
[293] "Aagab"         "F2rl2"         "Exosc3"        "Gspt2"        
[297] "Sh3rf3"        "Etv5"          "Xylb"          "Ldlrad3"      
[301] "Glyctk"        "Srp68"         "Snhg5"         "Actr8"        
[305] "Alox12b"       "Arhgap27"      "Nat8f5"        "Nol10"        
[309] "Crnkl1"        "Smu1"          "Nkrf"          "Nxpe4"        
[313] "Tmem100"       "St7l"          "Pgm1"          "Mak16"        
[317] "Atp8b2"        "Cep131"        "Wdsub1"        "Nacad"        
[321] "Sord"          "Golt1b"        "Pced1b"        "Tmem121b"     
[325] "Cdkal1"        "Zfp790"        "Tiam2"         "Adcyap1"      
[329] "Etv6"          "Creb5"         "Fstl3"         "2700046G09Rik"
[333] "Ufd1"          "Adra1d"        "Hnrnpf"        "Naa35"        
[337] "Smg8"          "Naa25"         "Rundc1"        "Ndufaf4"      
[341] "Ints5"         "Tlr9"          "Pdyn"          "Tox4"         
[345] "Card19"        "Eif4e2"        "Srpk3"         "Ythdf2"       
[349] "Gldn"          "Mycl"          "6430550D23Rik" "Ido2"         
[353] "Mybpc1"        "Rgs11"         "Irak1"         "Rrs1"         
[357] "Iqgap2"        "Il17ra"        "E2f4"          "Pdhx"         
[361] "Cbx8"          "Cyr61"         "Zcwpw1"        "Atp8b1"       
[365] "Kcna5"         "Mus81"         "Plpbp"         "Fam46a"       
[369] "Olig2"         "Slc2a9"        "P4ha3"         "Hspa9"        
[373] "Rabgef1"       "Ggnbp2"        "Slc52a3"       "Zdhhc15"      
[377] "2310057M21Rik" "Stx3"          "Tspyl2"        "Asb4"         
[381] "Cys1"          "Hvcn1"         "Top3a"         "Smad2"        
[385] "Rhbdd1"        "Cercam"        "Zfp687"        "Axin1"        
[389] "Uck2"          "Krcc1"         "Ddx10"         "Idua"         
[393] "Trim16"        "Mis12"         "Lrrtm2"        "Iba57"        
[397] "Arntl"         "Fancc"         "Slc9a8"        "Ldlrap1"      
[401] "Neurl2"        "Tpm1"          "Egr3"          "Pdzrn4"       
[405] "Rtn4rl2"       "Top1"          "Otx1"          "Rassf5"       
[409] "Agk"           "Rad17"         "Meltf"         "Sh3yl1"       
[413] "1700034H15Rik" "Rin1"          "Surf6"         "Srarp"        
[417] "Kcnip3"        "Rasip1"        "Lfng"          "Loxl3"        
[421] "Ddr1"          "Zufsp"         "Plaa"          "Dhx16"        
[425] "Met"           "Fam84a"        "Inafm2"        "Jdp2"         
[429] "Ercc8"         "Hlcs"          "Sgsh"          "4732491K20Rik"
[433] "Nfe2l3"        "Mpp3"          "Eed"           "Mettl25"      
[437] "Oas1c"         "Wdr91"         "Tecpr1"        "Mphosph10"    
[441] "Mthfr"         "Zfp874b"       "Snx2"          "Tob2"         
[445] "Ulk3"          "Med26"         "Gtpbp4"        "Gramd1b"      
[449] "Zfp90"         "Akap7"         "Dhx8"          "Sema6c"       
[453] "Serinc3"       "Ccl6"          "Prpf3"         "Fkbp5"        
[457] "Thtpa"         "Cdyl"          "Trim9"         "Fam110b"      
[461] "BC030500"      "Ints8"         "Mob3c"         "Lrrc57"       
[465] "Atpaf1"        "Slc35g2"       "B230206H07Rik" "Klhl8"        
[469] "Wdr92"         "Cbwd1"         "Rpusd2"        "Zranb1"       
[473] "Isg15"         "Unc45a"        "Epha2"         "Irf8"         
[477] "Moap1"         "6330409D20Rik" "Tmem109"       "Tbx2"         
[481] "Ucp3"          "Clec4a2"       "Cdkn1a"        "B4galt5"      
[485] "Atg14"         "Fam155a"       "Kansl2"        "1810010H24Rik"
[489] "Rars"          "D17H6S53E"     "Cyb5rl"        "Sema5b"       
[493] "Mrpl50"        "Arf6"          "Aggf1"         "Gimap1"       
[497] "Mboat1"        "Cnot9"         "Rrm2"          "Gdpd5"        
[501] "BC034090"      "Rbm19"         "Srfbp1"        "Sowahb"       
[505] "Araf"          "Plrg1"         "Mcam"          "Aspa"         
[509] "Zfp7"          "Synj2"         "Pced1a"        "Sec23ip"      
[513] "Cluap1"        "Zbtb17"        "Scg2"          "Gm960"        
[517] "Gm11549"       "Fndc8"         "Inip"          "Arsg"         
[521] "Chsy1"         "Tbcd"          "Tmem8"         "Gimap6"       
[525] "Tmem200b"      "Spidr"         "Tgfa"          "Ttc26"        
[529] "Cactin"        "Nup43"         "Pcsk1"         "Rnpepl1"      
[533] "Car14"        

$duration6wks
  [1] "Adora2a"       "Six3"          "Gpr88"         "Syndig1l"     
  [5] "Cd4"           "Chat"          "Isl1"          "Penk"         
  [9] "Trh"           "Drd2"          "Gpr6"          "Sh3rf2"       
 [13] "Rgs9"          "Tacr1"         "Htr1d"         "Slc35d3"      
 [17] "Lrrc10b"       "Drd1"          "Slc5a7"        "Slc10a4"      
 [21] "Ido1"          "Ptpn9"         "Ppp1r1b"       "Klhl13"       
 [25] "Adck2"         "Inhba"         "Pde1b"         "Rxrg"         
 [29] "Trib2"         "Banp"          "Oprk1"         "Fos"          
 [33] "Gng7"          "Sh2d3c"        "Rarb"          "Ldlr"         
 [37] "Nexn"          "Serpina9"      "Pde10a"        "Six3os1"      
 [41] "4930432K21Rik" "Nuak1"         "Spred2"        "Dach1"        
 [45] "Gfod1"         "Numb"          "Rasgrp2"       "Smad3"        
 [49] "Adcy5"         "Cln6"          "Gnb4"          "Cwc25"        
 [53] "Cdyl2"         "Asic4"         "Naglu"         "Lrrc61"       
 [57] "Ecel1"         "Per2"          "Casp7"         "Dll4"         
 [61] "Rilpl1"        "Dusp4"         "Junb"          "Mccc2"        
 [65] "Schip1"        "Dnajc21"       "Ier2"          "Sik2"         
 [69] "Spata5"        "Hrh1"          "Shq1"          "Slco5a1"      
 [73] "Scn5a"         "Ofd1"          "Cbfa2t3"       "Rtn4ip1"      
 [77] "Ybey"          "St8sia5"       "Heyl"          "Mical2"       
 [81] "Ccdc112"       "Mamld1"        "3632451O06Rik" "Prag1"        
 [85] "Baz1a"         "Gpr149"        "Sgsm1"         "Rbm11"        
 [89] "Myadml2"       "B3galt6"       "Zfp459"        "Fam177a"      
 [93] "Fmnl1"         "Tdp1"          "Pfkfb3"        "Bhlhe40"      
 [97] "Igf2bp2"       "Osbpl3"        "Zfand3"        "Sco1"         
[101] "Ttc22"         "Slc37a1"       "Cc2d1b"        "Lrfn2"        
[105] "Lig4"          "Mfap3"         "Pdp1"          "Cd180"        
[109] "Ptgs2"         "Cnot4"         "Gpatch11"      "Bace2"        
[113] "Slc10a7"       "Plcl2"         "Ninl"          "Irf2"         
[117] "Bdnf"          "B4galt4"       "Mtrf1"         "Trim45"       
[121] "Trpm4"         "Mark3"         "Nfatc2"        "Ngfr"         
[125] "Nectin4"       "Lrrc49"        "4931415C17Rik" "Crem"         
[129] "Hspbap1"       "Csrnp2"        "Itga9"         "Ipmk"         
[133] "Zfyve1"        "Diexf"         "Dlk1"          "Magel2"       
[137] "Glrx2"         "Tipin"         "Phf21b"        "Ccnf"         
[141] "Trim68"        "Pim1"          "Arhgap39"      "Gramd4"       
[145] "Mme"           "Dusp16"        "Arl5c"         "Zfand4"       
[149] "Zc3h6"         "Tefm"          "C77080"        "Jun"          
[153] "Slc25a35"      "Fbxl4"         "Oacyl"         "Fam212b"      
[157] "Dnajc1"        "Ccdc6"         "Plekha3"       "Plppr1"       
[161] "Pxdn"          "Phf13"         "Grip1"         "Zfp804a"      
[165] "B3gat2"        "2810029C07Rik" "Nr1d1"         "Ilvbl"        
[169] "Armc5"         "E2f6"          "Dgkk"          "Fam222a"      
[173] "P4ha2"         "Synpo"         "Ptcd1"         "Frmd3"        
[177] "Ahdc1"         "Parn"          "Mcf2l"         "Kdm4d"        
[181] "Lhfp"          "Ddx27"         "Zmynd8"        "Gpr21"        
[185] "Slc22a3"       "Ehhadh"        "Hunk"          "Zfp941"       
[189] "Gpr83"         "Pms1"          "Dtwd2"         "Myo5b"        
[193] "Prr5"          "Chaf1a"        "Slc35f4"       "Plin4"        
[197] "Mmp24"         "Lzts1"         "Gpr1"          "Pik3c3"       
[201] "Gm20878"       "Ankrd33b"      "Snap29"        "Rnf165"       
[205] "Rin3"          "Vipr1"         "Gm5083"        "Ddx50"        
[209] "Pla2g4e"       "Zfp366"        "Prkag2"        "Abcb1b"       
[213] "Sp9"           "Lrrc56"        "Rars2"         "Ccdc103"      
[217] "Tcerg1l"       "E130102H24Rik" "Mapk4"         "Ankra2"       
[221] "Fzd1"          "Ppm1d"         "Rassf1"        "Cacng4"       
[225] "Cables1"       "Gpr68"         "Uhrf2"         "Mall"         
[229] "Epm2a"         "Map3k21"       "Ttc34"         "LOC100862446" 
[233] "Gpr137b"       "Homer1"        "Fam117a"       "Cbln2"        
[237] "Ccdc183"       "P4ha1"         "Usb1"          "Plekhg5"      
[241] "Mkl1"          "Abhd6"         "Ubox5"         "Prkab1"       
[245] "Irs2"          "Dusp1"         "Mak"           "Kbtbd2"       
[249] "Bend3"         "Samd3"         "Fam53b"        "Zmym1"        
[253] "Tigd2"         "Mapk14"        "Zbtb40"        "Camk1g"       
[257] "Rhobtb2"       "Lsm11"         "Ephb2"         "St7"          
[261] "Rhob"          "1700029J07Rik" "6430571L13Rik" "Rnd3"         
[265] "Pop1"          "Dio2"          "Arhgef7"       "Cd276"        
[269] "Rasd1"         "Blm"           "Trim62"        "Asap2"        
[273] "Cftr"          "Omg"           "Wisp1"         "Ift52"        
[277] "Ptpn1"         "Gabrq"         "Trim17"        "Synpr"        
[281] "Lss"           "Actn2"         "Meis1"         "Galnt7"       
[285] "Rbm43"         "Pcsk9"         "Wdcp"          "Ammecr1"      
[289] "Mta2"          "Jade2"         "Dbr1"          "Prickle1"     
[293] "Dhx32"         "Rac2"          "4930513N10Rik" "Haus3"        
[297] "Gpt"           "Dusp14"        "Sgms1"         "Strip2"       
[301] "Pafah2"        "Serinc2"       "Cdh9"          "Sema4c"       
[305] "Adamts3"       "Hmox1"         "Gk5"           "Apold1"       
[309] "Morc2b"        "Dars"          "Nadk"          "Cpne2"        
[313] "Runx2"         "Fbn1"          "Cd302"         "Adamts1"      
[317] "Ubxn10"        "Xbp1"          "Pcp4l1"        "Bcor"         
[321] "Fam171a1"      "Nub1"          "Anxa3"         "Bbs2"         
[325] "Gart"          "Lrsam1"        "Sdr42e1"       "Zfp438"       
[329] "Spry2"         "Wrb"           "Traf3"         "Nr4a1"        
[333] "Nrde2"         "Mfhas1"        "Gjb6"          "Zfp180"       
[337] "Mtmr4"         "Ppp2r5a"       "Sdhaf3"        "Sh2d5"        
[341] "Zfp791"        "Rfesd"         "Arhgef3"       "Pcdh20"       
[345] "Tmcc1"         "Zswim4"        "Nipal2"        "Tbx1"         
[349] "Rfwd2"         "Hcfc2"         "Nprl3"         "Zfp316"       
[353] "Slfn2"         "Usp43"         "Bbs9"          "Ppp1r13l"     
[357] "Ccdc134"       "Pxmp4"         "Man2b2"        "Cfap70"       
[361] "Zkscan6"       "Sgcd"          "Zmat1"         "Zc3h3"        
[365] "Lgals12"       "Sh3rf3"        "Etv5"          "Oxnad1"       
[369] "Pgbd5"         "Zfp277"        "Ddx59"         "Rassf6"       
[373] "Nol10"         "Dusp7"         "Pid1"          "Nxpe4"        
[377] "Acin1"         "Mmachc"        "Ptprr"         "Rimkla"       
[381] "Wdsub1"        "P2ry2"         "Fam221a"       "Pipox"        
[385] "Sord"          "Furin"         "Cdkal1"        "Etv6"         
[389] "Creb5"         "Kirrel3"       "Fstl3"         "Nos3"         
[393] "Cdh4"          "Ufd1"          "Mocs1"         "Rundc1"       
[397] "Col11a1"       "Ndufaf4"       "Il21r"         "Tesk2"        
[401] "Pdyn"          "Nt5dc1"        "Gemin8"        "Cd36"         
[405] "Gm16299"       "Srpk3"         "Gm11696"       "Gldn"         
[409] "4933407L21Rik" "Spred3"        "Pex2"          "Trak1"        
[413] "Ifit1bl1"      "Mms22l"        "Clspn"         "Fra10ac1"     
[417] "A130010J15Rik" "Rrs1"          "Il17ra"        "E2f4"         
[421] "Dnajb5"        "Eif4b"         "Cbx8"          "Cyr61"        
[425] "Iqcc"          "Kcna5"         "Mus81"         "Ptpn12"       
[429] "Plpbp"         "Fam46a"        "Znrf1"         "1110032F04Rik"
[433] "Ryk"           "P4ha3"         "Scube3"        "Gtf2ird2"     
[437] "Klf11"         "Kifc1"         "Zdhhc15"       "2310057M21Rik"
[441] "Etfrf1"        "Mpp6"          "Cyb5d2"        "Socs3"        
[445] "Heatr1"        "Bend7"         "D10Wsu102e"    "Gm9776"       
[449] "Ccdc14"        "Ttc38"         "Gm14204"       "Tbx3"         
[453] "Tpbgl"         "Mis12"         "Fxr1"          "Ccno"         
[457] "BC048403"      "Zfp810"        "Fam196a"       "Prkd2"        
[461] "Papd7"         "Slc9a8"        "Tpm1"          "Filip1"       
[465] "Dnd1"          "Rtn4rl2"       "Top1"          "Slc9a5"       
[469] "Trim26"        "Rassf5"        "Tex10"         "Agk"          
[473] "Casp9"         "Wnt9a"         "Meltf"         "Sh3yl1"       
[477] "Rerg"          "Scn4b"         "Ak4"           "Srarp"        
[481] "Zbed4"         "Rasip1"        "Cacna2d2"      "Evpl"         
[485] "Ccl28"         "Kcne1l"        "Adnp2"         "Tmem241"      
[489] "Lfng"          "Trpc7"         "Max"           "Zbtb25"       
[493] "Mxi1"          "Fam241a"       "Nfam1"         "H1f0"         
[497] "Met"           "Hpdl"          "Inafm2"        "Rwdd3"        
[501] "Qtrt1"         "Klhdc1"        "Mtcl1"         "Prkch"        
[505] "Spred1"        "1110008L16Rik" "Kcna4"         "Dpf2"         
[509] "Ppp6r1"        "1700003D09Rik" "N6amt1"        "Snx2"         
[513] "Tob2"          "Irs4"          "Csmd2"         "Dph6"         
[517] "Tyw3"          "Sp2"           "Gramd1b"       "Ccdc181"      
[521] "Gfpt2"         "Slc16a6"       "Unc13c"        "Ccl6"         
[525] "Ccnd2"         "Prpf3"         "Cmtm3"         "Zhx2"         
[529] "Trim9"         "Adprm"         "Rflnb"         "Aco1"         
[533] "BC030500"      "Mfsd4a"        "Zc3h8"         "Sox18"        
[537] "Rps6ka5"       "Slc9a9"        "Sapcd2"        "Atpaf1"       
[541] "Vgf"           "Brca2"         "Hic1"          "Rft1"         
[545] "Slc35g2"       "Cbwd1"         "Lin9"          "Hic2"         
[549] "Tmem267"       "Bcl2"          "Btg2"          "Unc45a"       
[553] "Noxred1"       "Dclk2"         "Flcn"          "E130317F20Rik"
[557] "Moap1"         "Mettl2"        "Cdkn1a"        "B4galt5"      
[561] "C530005A16Rik" "Rin2"          "Kansl2"        "Ap4b1"        
[565] "Cyb5rl"        "1700019D03Rik" "Nmb"           "Utp18"        
[569] "B3gntl1"       "Gimap1"        "Mamstr"        "Cutc"         
[573] "LOC100861615"  "Ephb3"         "Zfp119a"       "Fbxo16"       
[577] "Zscan18"       "Ubc"           "Nr6a1"         "Araf"         
[581] "Mlip"          "Arhgap31"      "Cdc42ep3"      "Smim3"        
[585] "Uspl1"         "Sept9"         "Zbtb16"        "Ece1"         
[589] "Bcl6"          "Zfp202"        "Tspan14"       "Bloc1s5"      
[593] "Arpin"         "Wdr25"         "Zbtb17"        "Riox2"        
[597] "Lrrc8d"        "9630001P10Rik" "Rabl3"         "Spry4"        
[601] "Fndc8"         "Cbx4"          "Lhx2"          "Zfp512"       
[605] "Npbwr1"        "Pdia4"         "Baiap3"        "Nlrx1"        
[609] "Ttc26"         "Uvssa"         "Socs2"        

$duration8wks
  [1] "Gpr88"         "Ido1"          "Adck2"         "Fos"          
  [5] "Sh2d3c"        "Ldlr"          "Pde10a"        "4930432K21Rik"
  [9] "Smad3"         "Cwc25"         "Lrrc61"        "Per2"         
 [13] "Junb"          "Ier2"          "Ofd1"          "Rtn4ip1"      
 [17] "Ybey"          "Ccdc112"       "3632451O06Rik" "Rbm11"        
 [21] "Myadml2"       "B3galt6"       "Zfp459"        "Fam177a"      
 [25] "Igf2bp2"       "Osbpl3"        "Sco1"          "Slc37a1"      
 [29] "Lpcat2"        "Gpatch11"      "Bace2"         "Fam111a"      
 [33] "Slc10a7"       "S1pr2"         "Ninl"          "Mtrf1"        
 [37] "Trim45"        "Lrrc49"        "Csrnp2"        "Pdzd9"        
 [41] "Diexf"         "Glrx2"         "Tipin"         "Phf21b"       
 [45] "Arhgap39"      "Insig1"        "Arl5c"         "Tefm"         
 [49] "Fbxl4"         "Mex3d"         "A930017K11Rik" "Plekha3"      
 [53] "Plppr1"        "Phf13"         "2810029C07Rik" "Htr6"         
 [57] "Ciart"         "Frmd3"         "Parn"          "Anapc4"       
 [61] "Ddx27"         "Pms1"          "Dtwd2"         "Prr5"         
 [65] "Chaf1a"        "Slc35f4"       "Plin4"         "Pik3c3"       
 [69] "Rin3"          "Gm5083"        "Zfp366"        "Rars2"        
 [73] "Ifit1"         "Tcerg1l"       "E130102H24Rik" "Zfp930"       
 [77] "Fzd1"          "Ppm1d"         "Gtf2h1"        "Pou6f2"       
 [81] "Epm2a"         "A430108G06Rik" "Ttc34"         "Ccdc183"      
 [85] "Usb1"          "Dusp1"         "Mak"           "Samd3"        
 [89] "Tigd2"         "Rnd3"          "Cd276"         "Trim62"       
 [93] "Prdm4"         "Omg"           "Ift52"         "Tiparp"       
 [97] "Rbm43"         "Mta2"          "Dhx32"         "Rac2"         
[101] "4930513N10Rik" "Gpt"           "Sgms1"         "Plekho2"      
[105] "Cog6"          "Pum3"          "Morc2b"        "Dars"         
[109] "Scpep1"        "Dnttip2"       "Prss57"        "Nrap"         
[113] "Nub1"          "Anxa3"         "Gart"          "Sdr42e1"      
[117] "Wrb"           "Gm14440"       "Traf3"         "Nr4a1"        
[121] "Gjb6"          "Sdhaf3"        "Tmcc1"         "Nipal2"       
[125] "Egr2"          "Pak7"          "Bbs9"          "Pxmp4"        
[129] "Zkscan6"       "Exosc3"        "Gspt2"         "Oxnad1"       
[133] "Zfp277"        "Actr8"         "Arhgap27"      "Serpine1"     
[137] "Smu1"          "St7l"          "P2ry2"         "Golt1b"       
[141] "Cdkal1"        "Adcyap1"       "Il21r"         "Pdyn"         
[145] "Eif4e2"        "6430550D23Rik" "Spred3"        "Mybpc1"       
[149] "Ifit1bl1"      "Fra10ac1"      "Iqgap2"        "Eif4b"        
[153] "Pdhx"          "Cbx8"          "Kcna5"         "Znrf1"        
[157] "1110032F04Rik" "Hspa9"         "Rabgef1"       "Ggnbp2"       
[161] "Zdhhc15"       "Hvcn1"         "Cyb5d2"        "Ddx10"        
[165] "Tbx3"          "Rbm4"          "Notch4"        "Arntl"        
[169] "Tpm1"          "Xlr3a"         "Trit1"         "Agk"          
[173] "Rad17"         "Casp9"         "Wnt9a"         "Sh3yl1"       
[177] "Hnrnpa1"       "Zbed4"         "Rasip1"        "Adnp2"        
[181] "Gm12522"       "Lfng"          "Loxl3"         "Ell"          
[185] "Nfam1"         "H1f0"          "Met"           "Klhdc1"       
[189] "Eed"           "Zfp874b"       "Snx2"          "Capn11"       
[193] "Ccdc181"       "Ccl6"          "Nt5c1a"        "Cmtm3"        
[197] "BC030500"      "Zc3h8"         "Sox18"         "Sapcd2"       
[201] "Atpaf1"        "Slc35g2"       "Wdr92"         "Cbwd1"        
[205] "Hic2"          "Noxred1"       "Moap1"         "Fmo2"         
[209] "Mettl2"        "Atg14"         "C530005A16Rik" "Ap4b1"        
[213] "Vps37b"        "Ing2"          "Sema5b"        "Cutc"         
[217] "LOC100861615"  "Zscan18"       "Ubc"           "Nop14"        
[221] "Araf"          "Mlip"          "Aspa"          "Zfp7"         
[225] "Bloc1s5"       "Cluap1"        "Arpin"         "Riox2"        
[229] "Rabl3"         "Lhx2"          "Chsy1"         "Zfp512"       
# To classify genes in more detail, we collapsed the binarized matrix gene by gene and grouped genes from the unique collapsed patterns.
head(stat_binary)
colnames(stat_binary) <- c("Intercept", "duration4wks", "duration6wks", "duration8wks")
#stat_binary <- stat_binary %>% add_column(Symbol=stat_table_all$Symbol, .before=1)
stat_binary <- stat_binary %>% mutate(pattern= paste(Intercept,duration4wks,duration6wks, duration8wks ,sep = "_"))
pattern_TF <- table(stat_binary$pattern) # %>% as.data.frame()
stat_binary.gather <- stat_binary %>% gather(key=coeff_group, value=value, -pattern)
stat_binary.gather$coeff_group <- factor(stat_binary.gather$coeff_group, 
                                         levels = c("Intercept","duration4wks", "duration6wks", "duration8wks"),
                                         ordered = TRUE)

ggplot(stat_binary.gather, aes(x = coeff_group, y = pattern, fill = value)) + geom_tile(colour = "white") +
  theme_bw() + xlab("") + ylab("") + coord_flip() +
  # scale_x_discrete(labels = c("Intercept", "duration4wks", "duration6wks", "duration8wks")) +
  scale_y_discrete(labels = pattern_TF) +
  scale_fill_manual(values = c("grey80", "firebrick1")) 

Hierarchical Clustering of all the samples based on Top 100 genes from GLM

  1. heatmap with ordering the sample by their treatment
  2. heatmap without ordering the sample (column)
# heatmap 
logCPM <- log.cpm
rownames(logCPM) <- y$genes$Symbol
colnames(logCPM) <- data.design$sample_short # get it into the y project
logCPM<- logCPM[stat_table_all$Symbol[1:100],]
      # get rid of the running group
logCPM<- t(scale(t(logCPM))) %>% as.data.frame() %>% as.matrix()
logCPM.order <- logCPM[, colnames(logCPM) %>% order()]

library(gplots)
#heatmap, columns ordered by their group/treatment
col.pan <- colorpanel(100, "blue", "white", "red")
heatmap.2(logCPM.order, col=col.pan, Rowv=TRUE, Colv= FALSE, scale="none", 
          trace="none", dendrogram="row", cexRow=0.5, cexCol=0.7, density.info="none",
          margin=c(9,4), lhei=c(2,10), lwid=c(2,6))

#heatmap, columns ordered by the clustering
col.pan <- colorpanel(100, "blue", "white", "red")
heatmap.2(logCPM.order, col=col.pan, Rowv=TRUE, scale="none", 
          trace="none", dendrogram="both", cexRow=0.5, cexCol=0.7, density.info="none",
          margin=c(9,4), lhei=c(2,10), lwid=c(2,6))

GO and Kegg pathway analysis

## ---- GO and Kegg pathway analysis
# Xulong's function
source("~/Documents/code/01_function/Kegg_function.R")
gk.diet <- myGK(stat_table_all$Symbol)
head(gk.diet$BP[, c("Term", "Pvalue")])
head(gk.diet$MF[, c("Term", "Pvalue")])
head(gk.diet$CC[, c("Term", "Pvalue")])
gk.diet$KEGG[, c("Term", "Pvalue")]
# output the analysis result
#file_name<- list("./analysis/Cortex_BP.txt", "./analysis/Cortex_MF.txt", "./analysis/Cortext_CC.txt", "./analysis/Cortext_KEGG.txt")
#walk2(gk.diet, file_name, write.table,  sep="\t", row=F, quote=F)
#write.table(stat_table_all, "./analysis/diet_gene.txt", sep="\t", row=F, quote=F)
sessionInfo()
R version 3.4.3 (2017-11-30)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: OS X El Capitan 10.11.6

Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
 [1] grid      parallel  stats4    stats     graphics  grDevices utils    
 [8] datasets  methods   base     

other attached packages:
 [1] GO.db_3.5.0          pathview_1.18.2      org.Hs.eg.db_3.5.0  
 [4] org.Mm.eg.db_3.5.0   KEGG.db_3.2.3        GOstats_2.44.0      
 [7] graph_1.56.0         Category_2.44.0      Matrix_1.2-12       
[10] AnnotationDbi_1.40.0 VennDiagram_1.6.19   futile.logger_1.4.3 
[13] gplots_3.0.1         corrplot_0.84        IRanges_2.12.0      
[16] S4Vectors_0.16.0     Biobase_2.38.0       BiocGenerics_0.24.0 
[19] edgeR_3.20.9         limma_3.34.9         bindrcpp_0.2        
[22] forcats_0.3.0        stringr_1.3.0        dplyr_0.7.4         
[25] purrr_0.2.4          readr_1.1.1          tidyr_0.8.0         
[28] tibble_1.4.2         ggplot2_2.2.1        tidyverse_1.2.1     
[31] knitr_1.20           BiocStyle_2.6.1     

loaded via a namespace (and not attached):
 [1] nlme_3.1-131           bitops_1.0-6           lubridate_1.7.3       
 [4] bit64_0.9-7            httr_1.3.1             rprojroot_1.3-2       
 [7] Rgraphviz_2.22.0       tools_3.4.3            backports_1.1.2       
[10] R6_2.2.2               KernSmooth_2.23-15     DBI_0.8               
[13] lazyeval_0.2.1         colorspace_1.3-2       tidyselect_0.2.4      
[16] mnormt_1.5-5           bit_1.1-12             compiler_3.4.3        
[19] cli_1.0.0              rvest_0.3.2            xml2_1.2.0            
[22] labeling_0.3           KEGGgraph_1.38.1       caTools_1.17.1        
[25] scales_0.5.0           psych_1.7.8            genefilter_1.60.0     
[28] RBGL_1.54.0            digest_0.6.15          foreign_0.8-69        
[31] rmarkdown_1.9          XVector_0.18.0         AnnotationForge_1.20.0
[34] pkgconfig_2.0.1        htmltools_0.3.6        rlang_0.2.0           
[37] readxl_1.0.0           rstudioapi_0.7         RSQLite_2.0           
[40] bindr_0.1.1            jsonlite_1.5           gtools_3.5.0          
[43] RCurl_1.95-4.10        magrittr_1.5           Rcpp_0.12.16          
[46] munsell_0.4.3          stringi_1.1.7          yaml_2.1.18           
[49] zlibbioc_1.24.0        plyr_1.8.4             blob_1.1.0            
[52] gdata_2.18.0           crayon_1.3.4           lattice_0.20-35       
[55] Biostrings_2.46.0      haven_1.1.1            splines_3.4.3         
[58] annotate_1.56.1        KEGGREST_1.18.1        hms_0.4.2             
[61] locfit_1.5-9.1         pillar_1.2.1           reshape2_1.4.3        
[64] futile.options_1.0.0   XML_3.98-1.10          glue_1.2.0            
[67] evaluate_0.10.1        lambda.r_1.2           modelr_0.1.1          
[70] png_0.1-7              cellranger_1.1.0       gtable_0.2.0          
[73] assertthat_0.2.0       xtable_1.8-2           broom_0.4.3           
[76] survival_2.41-3        memoise_1.1.0          statmod_1.4.30        
[79] GSEABase_1.40.1